# coding: utf-8
from config import setting
from nltk import FreqDist


f_write = open('./article_top_words', 'w+', encoding='utf-8')
with open(setting.MODEL_DIR + '/article_seg_with_content_id', 'r', encoding='utf-8') as f_read:
    for line in f_read.readlines():
        content_id, content_seg = line.split(',')
        freq = FreqDist(content_seg.split(' '))
        article_word_list = list()
        for item in freq.most_common(n=30):
            article_word_list.append('{}/{}'.format(item[0], item[1]))
        f_write.write('{},{}\n'.format(content_id, ' '.join(article_word_list)))
f_write.close()